"""
1) Extract data from Orbis and AnaCredit
2) Apply classification rule
-- the rule is applied on both data sources separately
3) Export firm data

"""

import pandas as pd
import pyodbc

path_data = r'P:\ECB business areas\DGR\Staff\papouts\Research\Active\Firm Heterogeneities\Final datasets'

sql_conn = pyodbc.connect('DSN=DISC DP Impala 64bit',autocommit=True)

query = """

select  entty_riad_id, 
        entty_riad_cd_ac as entty_riad_cd, 
        cntry, 
        entrprs_sz_le_corr, 
        entrprs_sz_orb_corr, 
        entrprs_sz_le,
        case when entrprs_sz_le_corr = -99 then (case when entrprs_sz_orb_corr = -99 or entrprs_sz_orb_corr is null then entrprs_sz_le else entrprs_sz_orb_corr end)
             else entrprs_sz_le_corr
        end as sz_f,
        nmbr_empl_orb_flag,
        totalassets_orb_flag,
        turnover_orb_flag,
        numberofemployees, 
        totalassets, 
        operatingrevenueturnover,
        entrprs_sz_le_flag,
        nmbr_emplys_le_flag,
        blnc_sht_ttl_le_flag,
        annl_trnvr_le_flag,
        nmbr_emplys_le_c,
        blnc_sht_ttl_le_c,
        annl_trnvr_le_c         
        
from (

SELECT  
        kk.entty_riad_id,
        kk.entty_riad_cd as entty_riad_cd_ac,
        rr.entty_riad_cd as entty_riad_cd_orb,
        kk.cntry, rr.cntry as cntry_orb,
        entrprs_sz_le,
        entrprs_sz_le_corr,
        entrprs_sz_orb_corr,
        nmbr_empl_orb_flag,
        totalassets_orb_flag,
        turnover_orb_flag,
        numberofemployees, 
        totalassets, 
        operatingrevenueturnover,
        entrprs_sz_le_flag,
        nmbr_emplys_le_flag,
        blnc_sht_ttl_le_flag,
        annl_trnvr_le_flag,
        nmbr_emplys_le_c,
        blnc_sht_ttl_le_c,
        annl_trnvr_le_c               
        

FROM (

SELECT DISTINCT
                cntry, 
                entty_riad_id,
                entty_riad_cd,
                entrprs_sz_le,
                entrprs_sz_le_corr,
                nmbr_emplys_le_c,
                blnc_sht_ttl_le_c,
                annl_trnvr_le_c,
                entrprs_sz_le_flag,
                nmbr_emplys_le_flag,
                blnc_sht_ttl_le_flag,
                annl_trnvr_le_flag
FROM lab_org_dgr_fir.ac_dbtr_201912_kk
 
 ) kk
 
 LEFT JOIN (  select distinct entty_riad_cd, cntry, 
                            CASE WHEN r1.nmbr_empl_orb_flag = 1 
                                                                THEN (CASE WHEN (r1.totalassets_orb_flag = 1 OR r1.turnover_orb_flag = 1)
                                                                                 
                                                                                                                                          THEN (
                                                                                                                                                   CASE WHEN r1.numberofemployees < 10 AND (r1.totalassets <= 2000000 OR r1.operatingrevenueturnover <= 2000000) THEN 4
                                                                                                                                                        WHEN r1.numberofemployees < 50 AND (r1.totalassets <= 10000000 OR r1.operatingrevenueturnover <= 10000000) THEN 3
                                                                                                                                                        WHEN r1.numberofemployees < 250 AND (r1.totalassets <= 43000000 OR r1.operatingrevenueturnover <= 50000000) THEN 2
                                                                                                                                                        ELSE 1 END
                                                                                                                                                 ) 
                      
                                                                        WHEN r1.totalassets_orb_flag = 0 AND 
                                                                             r1.turnover_orb_flag = 0         THEN (
                                                                                                                       CASE WHEN r1.numberofemployees < 10 THEN 4
                                                                                                                            WHEN r1.numberofemployees >= 10 AND r1.numberofemployees < 50 THEN 3
                                                                                                                            WHEN r1.numberofemployees >= 50 AND r1.numberofemployees < 250 THEN 2
                                                                                                                            ELSE 1 END  
                                                                                                                     )
                                                                        ELSE NULL END)
                           
                         WHEN r1.nmbr_empl_orb_flag = 0 AND
                             r1.totalassets_orb_flag = 1 AND 
                              r1.turnover_orb_flag = 0          THEN (CASE WHEN r1.totalassets <= 2000000 THEN 4
                                                                             WHEN r1.totalassets <= 10000000 THEN 3
                                                                             WHEN r1.totalassets <= 43000000 THEN 2
                                                                             ELSE 1 END) 
                                                                           
                                                                         
                          WHEN r1.nmbr_empl_orb_flag = 0 AND
                             r1.totalassets_orb_flag = 0 AND 
                              r1.turnover_orb_flag = 1          THEN (CASE WHEN r1.operatingrevenueturnover <= 2000000 THEN 4
                                                                             WHEN r1.operatingrevenueturnover <= 10000000 THEN 3
                                                                             WHEN r1.operatingrevenueturnover <= 50000000 THEN 2
                                                                             ELSE 1 END)
                              
                          
                          WHEN r1.nmbr_empl_orb_flag = 0 AND
                             r1.totalassets_orb_flag = 1 AND 
                              r1.turnover_orb_flag = 1          THEN (CASE WHEN ((r1.totalassets - r1.operatingrevenueturnover) >= 0 AND r1.totalassets <= 2000000) OR ((r1.totalassets - r1.operatingrevenueturnover) < 0 AND r1.operatingrevenueturnover <= 2000000) THEN 4
                                                                             WHEN ((r1.totalassets - r1.operatingrevenueturnover) >= 0 AND r1.totalassets <= 10000000) OR ((r1.totalassets - r1.operatingrevenueturnover) < 0 AND r1.operatingrevenueturnover <= 10000000) THEN 3
                                                                             WHEN ((r1.totalassets - r1.operatingrevenueturnover) >= 0 AND r1.totalassets <= 43000000) OR ((r1.totalassets - r1.operatingrevenueturnover) < 0 AND r1.operatingrevenueturnover <= 50000000) THEN 2
                                                                             ELSE 1 END)             
                                              
                          ELSE -99 END AS entrprs_sz_orb_corr,
                          nmbr_empl_orb_flag,
                          totalassets_orb_flag,
                          turnover_orb_flag,
                          numberofemployees, 
                          totalassets, 
                          operatingrevenueturnover                          
                
                  from  lab_org_dgr_fir.orbis2_dt_201912_KK r1
                  where 
                        filing_flag = filingtype_pref
                        and cons_flag = cons_flag_min
                        and cast(closingdate as int)= max_date
                        and audit_flag = audit_flag_min
                        and accounting_flag = accounting_flag_min
            ) rr
            
ON kk.entty_riad_cd = rr.entty_riad_cd and kk.cntry = rr.cntry

) f
where (case when entrprs_sz_le_corr = -99 then (case when entrprs_sz_orb_corr = -99 or entrprs_sz_orb_corr is null then entrprs_sz_le else entrprs_sz_orb_corr end)
             else entrprs_sz_le_corr end) in (1,2,3,4)

                """
                
dd = pd.read_sql(query, sql_conn)
sql_conn.close()

# exclude all firms that have more than 1 size variable since it is ambiguous information
redundant = dd[['cntry', 'entty_riad_id', 'sz_f']].groupby(['cntry', 'entty_riad_id']).count().reset_index()
print(redundant[redundant.sz_f > 1].entty_riad_id.nunique()) # 1 660
single = redundant[redundant.sz_f == 1].drop(['sz_f'], axis=1)
dd_clean = dd.merge(single, how='inner', on=['cntry', 'entty_riad_id'])

# save results
dd_clean.to_stata(path_data + r'\dbtr_combined_dt.dta', write_index=False)



